# coding:utf8
import findspark
findspark.init()
# 导入Spark的相关包
from pyspark import SparkConf,SparkContext
if __name__ == '__main__':
    # 0. 初始化执行环境 构建SparkContext对象
    conf = SparkConf().setAppName("test").setMaster("local[*]")
    sc = SparkContext(conf=conf)
    # 读取HDFS文件数据测试
    hdfs_rdd = sc.wholeTextFiles("hdfs://bigdata:9820/pySpark_input/words.txt")
    print(hdfs_rdd.map(lambda x:x[1]).collect())
    print("hdfs_rdd 内容:", hdfs_rdd.collect())
    print(hdfs_rdd.getNumPartitions())